In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pylab as plt

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pylab as plt
from IPython.display import display, clear_output
from ipywidgets import interact, widgets
In [2]:
csv_path = "CodingChallenge_v2/car_imgs_4000.csv"
images_folder = "CodingChallenge_v2/imgs"
In [3]:
df = pd.read_csv(csv_path)
df.head()
Out[3]:
filename perspective_score_hood perspective_score_backdoor_left
0 66ee2d88-f403-4821-a370-8a3d72e200f9.jpg 0.27352 0.439526
1 bf7e237e-f12b-4ffc-8d79-6855cc35ea06.jpg 0.00000 0.902682
2 4c297488-c81f-464d-92bf-1c408333a912.jpg 0.00000 0.900864
3 6c95ad0b-fda6-42a8-a33d-b430073e1dcd.jpg 0.00000 0.213162
4 bd806d94-4c2e-4cd2-8e02-5eaba7c7c63e.jpg 0.00000 0.818388
In [4]:
df.shape
Out[4]:
(4000, 3)
In [5]:
df[df.perspective_score_hood > 0].shape
Out[5]:
(1897, 3)
In [6]:
df.perspective_score_hood.mean(), df.perspective_score_backdoor_left.mean()
Out[6]:
(0.30303198100175, 0.31336786319750004)

This does not seem as a great imbalance, at least at first glance¶

In [7]:
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Histogram(
    x=df.perspective_score_hood, name="perspective_score_hood"), row=1, col=1)
fig.add_trace(go.Histogram(
    x=df.perspective_score_backdoor_left, name="perspective_score_backdoor_left"), row=1, col=2)

Even scores have similar distribution, interesting¶

In [8]:
images = df.filename.to_list()
hood_scores = df.perspective_score_hood.to_list()
backdoor_scores = df.perspective_score_backdoor_left.to_list()
In [9]:
slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(images)-1)

@interact(file=slider)
def plot_file(file):
    img_path = f"{images_folder}/{images[file]}"
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig = plt.imshow(img)
    plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")

There are quiet a lot of the images which are indeed a mixture of classes, also there are pictures which have none of the classes. This should also contribute to better generalization.¶

In [10]:
slider = widgets.IntSlider(
    value=3709,
    min=0,
    max=len(images)-1)

@interact(file=slider)
def plot_file(file):
    img_path = f"{images_folder}/{images[file]}"
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig = plt.imshow(img)
    plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")

I have my doubts as for some pictures, where I would say scores are off. I will use them in the following training anyway, but my yellow flag would go after a discussion. For example, the following picture seem to contain left bakdoor while both scores are 0. I would have no problem with that, if the task would clearly state, that we need score more than 0 only for objects which fully get into the image¶

In [11]:
slider = widgets.IntSlider(
    value=3289,
    min=0,
    max=len(images)-1)

@interact(file=slider)
def plot_file(file):
    img_path = f"{images_folder}/{images[file]}"
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig = plt.imshow(img)
    plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")

I will get down to the main business and come back, if I have more time¶

In [ ]: